2022-02-022
suppressPackageStartupMessages({
library("tidyverse")
library("reticulate")
library("ggplot2")
library("SingleCellExperiment")
library("scater")
library("Seurat")
library("SeuratDisk")
library("zellkonverter")
})
reticulate::repl_python()
Python 3.9.4 (/Users/mcgaugheyd/git/chick_analysis_collab/renv/python/virtualenvs/renv-python-3.9/bin/python)
Reticulate 1.24 REPL -- A Python interpreter in R.
Enter 'exit' or 'quit' to exit the REPL and return to R.
quit
# load('~/data/chick_miruna/08052021.RData')
# save just combined to reduce load time from Miruna's entire environment
# save(combined, file = '~/data/chick_miruna/combined_mgg_08052021.Rdata' )
load('~/data/chick_miruna/combined_mgg_08052021.Rdata')
combined
An object of class Seurat
20555 features across 17295 samples within 2 assays
Active assay: RNA (18555 features, 0 variable features)
1 other assay present: integrated
2 dimensional reductions calculated: pca, umap
color_palette <- pals::glasbey()[1:14]
names(color_palette) <- seq(0,13, 1)
reticulate::repl_python()
import scvelo as scv
import scanpy as sc
import cellrank as cr
Convert('~/data/chick_miruna/M006_adata.h5ad', dest = "h5seurat", overwrite = TRUE)
Convert('~/data/chick_miruna/M007_adata.h5ad', dest = "h5seurat", overwrite = TRUE)
Convert('~/data/chick_miruna/M008_adata.h5ad', dest = "h5seurat", overwrite = TRUE)
m006 <- LoadH5Seurat('~/data/chick_miruna/M006_adata.h5seurat')
m007 <- LoadH5Seurat('~/data/chick_miruna/M007_adata.h5seurat')
m008 <- LoadH5Seurat('~/data/chick_miruna/M008_adata.h5seurat')
m006 <- RenameCells(m006, new.names = paste0(colnames(m006), '-1_1'))
m007 <- RenameCells(m007, new.names = paste0(colnames(m007), '-1_2'))
m008 <- RenameCells(m008, new.names = paste0(colnames(m008), '-1_3'))
mAll <- merge(m006, y = c(m007, m008))
# remove cells in combined but not in mAll to facilate pca/umap/HVG transfer
cells_between <- colnames(combined)[colnames(combined) %in% colnames(mAll)]
combined_sub <- subset(combined, cells = cells_between)
mAll <- subset(mAll, cells = cells_between)
mAll@meta.data <- combined_sub@meta.data
# copy over PCA and UMAP
mAll[['integrated']] <- combined_sub[['integrated']]
mAll[['umap']] <- combined_sub[['umap']]
pca <- combined_sub@reductions$pca@cell.embeddings
mAll[['pca']] <- CreateDimReducObject(
embeddings = pca,
key = 'PC_',
assay = 'integrated',
global = TRUE
)
SaveH5Seurat(mAll, filename = "~/data/chick_miruna/mAll.h5seurat", overwrite = TRUE)
Convert("~/data/chick_miruna/mAll.h5seurat", dest = "h5ad", overwrite = TRUE)
# make sample specific adata
m006 <- subset(mAll, cluster.condition == 'Sall1 CTRL_Sall1 CTRL')
m007 <- subset(mAll, cluster.condition == 'Sall1 KO15_Sall1 KO15')
m008 <- subset(mAll, cluster.condition == 'Sall1 KO16_Sall1 KO16')
SaveH5Seurat(m006, filename = "~/data/chick_miruna/m006.h5seurat", overwrite = TRUE)
SaveH5Seurat(m007, filename = "~/data/chick_miruna/m007.h5seurat", overwrite = TRUE)
SaveH5Seurat(m008, filename = "~/data/chick_miruna/m008.h5seurat", overwrite = TRUE)
Convert("~/data/chick_miruna/m006.h5seurat", dest = "h5ad", overwrite = TRUE)
Convert("~/data/chick_miruna/m007.h5seurat", dest = "h5ad", overwrite = TRUE)
Convert("~/data/chick_miruna/m008.h5seurat", dest = "h5ad", overwrite = TRUE)
def run_scVelo(adata, adata_out, redo_pca):
adata = sc.read_h5ad(adata)
print(adata)
if redo_pca:
print("\nRunning HVG and PCA\n")
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=10000)
sc.tl.pca(adata)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)
# sc.tl.umap(adata)
scv.pp.moments(adata, n_pcs=None, n_neighbors=None)
print("Recover dynamics")
scv.tl.recover_dynamics(adata, n_jobs=12)
scv.tl.velocity(adata, mode="dynamical")
scv.tl.velocity_graph(adata, n_jobs=4)
# https://github.com/theislab/scvelo/issues/255#issuecomment-739995301
adata.__dict__['_raw'].__dict__['_var'] = adata.__dict__['_raw'].__dict__['_var'].rename(columns={'_index': 'features'})
adata.write_h5ad(adata_out)
return('Done')
run_scVelo('/Users/mcgaugheyd/data/chick_miruna/m006.h5ad', '/Users/mcgaugheyd/data/chick_miruna/m006_scVelo.h5ad', True)
run_scVelo('/Users/mcgaugheyd/data/chick_miruna/m007.h5ad', '/Users/mcgaugheyd/data/chick_miruna/m007_scVelo.h5ad', True)
run_scVelo('/Users/mcgaugheyd/data/chick_miruna/m008.h5ad', '/Users/mcgaugheyd/data/chick_miruna/m008_scVelo.h5ad', True)
scv.set_figure_params()
color_palette=["#0000FF", "#FF0000", "#00FF00", "#000033", "#FF00B6", "#005300", "#FFD300", "#009FFF", "#9A4D42", "#00FFBE", "#783FC1", "#1F9698", "#FFACFD", "#B1CC71"]
def scVelo_plotting(adata_path, prefix):
adata = sc.read_h5ad(adata_path)
scv.tl.velocity_embedding(adata, basis="umap")
scv.pl.velocity_embedding_stream(adata, basis="umap", legend_fontsize=12, color='seurat_clusters', save= prefix + '.embedding_stream.png', show = False, dpi = 300, palette=color_palette)
scv.pl.velocity(adata, ['SALL1'], show = False, save = prefix + '.SALL1.png', dpi = 300, color ='seurat_clusters', palette=color_palette)
scv.tl.rank_velocity_genes(adata, groupby='seurat_clusters', min_corr=.3)
df = scv.DataFrame(adata.uns['rank_velocity_genes']['names'])
df.to_csv(prefix + '.rank_velocity.csv')
adata.var.to_csv(prefix + '.var.csv')
scv.tl.paga(adata, groups='seurat_clusters')
scv.pl.paga(adata, basis='umap', size=20, alpha=.3, color = 'seurat_clusters',
min_edge_width=2, node_size_scale=1.5, show = False,
dpi = 300, save = prefix + '.PAGA.png', palette=color_palette)
scVelo_plotting('/Users/mcgaugheyd/data/chick_miruna/m006_scVelo.h5ad', 'WT')
DimPlot(combined, label = TRUE, label.box = TRUE, split.by = 'cluster.condition', pt.size = 1) + scale_color_manual(values = color_palette) + scale_fill_manual(values = alpha(rep('white', 14), 0.9))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
DimPlot(combined, split.by = 'cluster.condition',group.by = 'Phase' ) + scale_color_manual(values = pals::brewer.set2(5) %>% unname())
WT, G15, then G16
Velocity is the relationship between unspliced (the “future”) and spliced (the “present”) mRNA. If there is a higher ratio of unspliced to spliced of a transcript, then presumably in the future the cell will be splicing and using more of the that transcript. Vice versa for higher ratios of spliced / unspliced. By linking nearby cells (graph) and the velocities you can also infer direction.
knitr::include_graphics("figures/scvelo_WT.embedding_stream.png")
knitr::include_graphics("figures/scvelo_G15.embedding_stream.png")
knitr::include_graphics("figures/scvelo_G16.embedding_stream.png")
You can take each cluster and identify genes whose velocity differs between them
velo %>% mutate(Cluster = as.integer(Cluster)) %>% group_by(Cluster, Gene) %>% summarise(`Number of Samples with Diff Velocity` = n(), `Samples with Diff Velocity` = paste(Sample, collapse = ', ')) %>% DT::datatable(filter = 'top', options = list(
pageLength = 20, autoWidth = TRUE))
`summarise()` has grouped output by 'Cluster'. You can override using the `.groups` argument.
gene = 'DPYSL3'
for prefix in ['WT', 'G15','G16']:
print(prefix)
if prefix == 'WT':
file = '/Users/mcgaugheyd/data/chick_miruna/m006_scVelo.h5ad'
if prefix == 'G15':
file = '/Users/mcgaugheyd/data/chick_miruna/m007_scVelo.h5ad'
if prefix == 'G16':
file = '/Users/mcgaugheyd/data/chick_miruna/m008_scVelo.h5ad'
adata = sc.read_h5ad(file)
scv.pl.velocity(adata, [gene], show = False, save = prefix + '.' + gene + '.png', dpi = 300, color ='seurat_clusters', palette=color_palette)
WT
saving figure to file ./figures/scvelo_WT.DPYSL3.png
<AxesSubplot:title={'center':'expression'}>
G15
saving figure to file ./figures/scvelo_G15.DPYSL3.png
<AxesSubplot:title={'center':'expression'}>
G16
saving figure to file ./figures/scvelo_G16.DPYSL3.png
<AxesSubplot:title={'center':'expression'}>
DimPlot(combined, label = TRUE, label.box = TRUE, split.by = 'cluster.condition', pt.size = 1) + scale_color_manual(values = color_palette) + scale_fill_manual(values = alpha(rep('white', 14), 0.9))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
DPLYSL3 velocity drops in cluster 2 in the WT, but is maintained in the G15/G16. We also see reduced velocity in the G15/G16 in the top right branch, while the WT maintains the velocity.
DPYSL3 has a role in migration and cell cycle
WT, G15, then G16
No huge differences in velocity characteristics. Quick viz explainer for velocity: